library(ggplot2)
# install.packages('pacman')
pacman::p_load(forecast)
dev.new(width=12, height=5)
knitr::opts_chunk$set(fig.width=12, fig.height=5)
# install.packages('tidyverse')
df_train = read.csv('data/train.csv')
df_train$Date = as.Date(df_train$Date)
df_test = read.csv('data/test.csv')
df_store = read.csv('data/store.csv')
head(df_store)
head(df_train)
head(df_test)
min(df_store[!is.na(df_store$CompetitionDistance),'CompetitionDistance'])
## [1] 20
df_store[df_store$CompetitionDistance==20,]
stores_test = unique(df_test$Store)
#Filtramos del DF las stores de test
df_train_filtered = df_train[df_train$Store %in% stores_test, ]
head(df_train_filtered)
df_train_tienda_1 = df_train_filtered[df_train_filtered$Store==1,]
head(df_train_tienda_1)
df_train_tienda_3 = df_train_filtered[df_train_filtered$Store==3,]
head(df_train_tienda_3)
nrow(df_train_filtered[(df_train_filtered$DayOfWeek==7),])
## [1] 110024
nrow(df_train_filtered[(df_train_filtered$DayOfWeek==7)&(df_train_filtered$Sales==0),])
## [1] 107075
df_train_tienda_1$ventas_clientes = convolve(df_train_tienda_1$Sales, df_train_tienda_1$Customers, conj = TRUE)
ggplot(df_train_tienda_1, aes(x = Date, y = ventas_clientes)) +
geom_line(color="green", size = 1.2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Cantidad de ventas a lo largo de los últimos años")
ggCcf(df_train_tienda_1$Customers, df_train_tienda_1$Sales,lag.max = 300)
# Autocorrelación de la serie de ventas
ggAcf(df_train_tienda_1$Sales, lag.max = length(df_train_tienda_1$Sales))
# Correlación cruzada tienda 1 y 3
ggplot(df_train_tienda_1, aes(x = Date, y = Sales)) +
geom_line(color="red", size = 1.2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Ventas Tienda 1") +
xlab("Fecha") +
ylab("Ventas")
ggCcf(df_train_tienda_1$Sales,df_train_tienda_3$Sales)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(feasts)
## Loading required package: fabletools
##
## Attaching package: 'fabletools'
## The following objects are masked from 'package:forecast':
##
## accuracy, forecast
y <- tsibble(
Date = df_train_tienda_1$Date,
Sales = df_train_tienda_1$Sales,
index = Date
)
autoplot(y,Sales) +
labs(title = "Ventas diarias Tienda 1",
y = "Ventas", x='Fecha')
#Filtrando cuando se encuentra cerrado
melsyd_economy <- df_train_filtered %>%
filter(Open == 1, Store == 1) %>%
as_tsibble(index = Date)
autoplot(melsyd_economy, Sales) +
labs(title = "Ventas - Domingos y feriados filtrados")
melsyd_economy <- df_train_filtered %>%
filter(Store == 1) %>%
mutate(Sales = ifelse(Open == 1, Sales, NA),
Customers = ifelse(Open == 1, Customers, NA),
Promo = ifelse(Open == 1, Promo, NA),
StateHoliday = ifelse(Open == 1, StateHoliday, NA),
SchoolHoliday = ifelse(Open == 1, SchoolHoliday, NA),
) %>%
as_tsibble(index = Date)
melsyd_economy %>%
gg_season(Sales, labels = "both") +
labs(title = "Seasonal plot")
## Warning: Removed 3 row(s) containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_text).
melsyd_economy %>% gg_season(Sales, period = "week") +
theme(legend.position = "none") +
labs(title="Weekly Seasonality")
## Warning: Removed 141 row(s) containing missing values (geom_path).
ggplot(melsyd_economy %>%
filter(Open==1) %>%
mutate(DayOfWeek = as.factor(DayOfWeek)), aes(x= DayOfWeek, y=Sales, color = DayOfWeek)) +
geom_boxplot()+
labs(title = "Ventas diarias Tienda 1",
y = "Ventas", x='Día de la semana')
# La mediana y los extramos de la caja se comportan de manera distinta dependiendo el día de la semana
melsyd_economy %>% gg_season(Sales, period = "year") +
theme(legend.position = "none") +
labs(title="Year Seasonality")
## Warning: Removed 3 row(s) containing missing values (geom_path).
#Subseries Plot, estacionalidad dentro de la semana
melsyd_economy %>%
gg_subseries(Sales,period = 'week') +
labs(title = "Ventas por día de la semana en Tienda 1",
y = "Ventas", x='Día de la semana')
## Warning: Removed 134 row(s) containing missing values (geom_path).
## Warning: Removed 134 rows containing missing values (geom_hline).
melsyd_economy %>%
gg_subseries(Sales,period = 'month') +
labs(title = "Ventas por día del mes en Tienda 1",
y = "Ventas", x='Día del mes')
## Warning: Removed 1 row(s) containing missing values (geom_path).
ggplot(melsyd_economy %>%
filter(Open==1) %>%
mutate(DayOfMonth = as.factor(lubridate::day(Date))), aes(x= DayOfMonth, y=Sales, color = DayOfMonth)) +
geom_boxplot()+
labs(title = "Ventas por día del mes para la Tienda 1",
y = "Ventas", x='Día del mes')
ggplot(melsyd_economy %>%
filter(Open==1) %>%
mutate(DayToEndOfMonth = as.factor(lubridate::days_in_month(Date) - lubridate::day(Date))), aes(x= DayToEndOfMonth, y=Sales, color = DayToEndOfMonth)) +
geom_boxplot()
# Cuando faltan pocos días para que termine el mes la venta es alta.
ggplot(melsyd_economy %>%
mutate(Abierto_Dia_Anterior = as.factor(lag(Open))) %>%
filter(Open==1) , aes(x= Abierto_Dia_Anterior, y=Sales, color = Abierto_Dia_Anterior)) +
geom_boxplot()+
labs(title = "Ventas diarias Tienda 1",
y = "Ventas", x='Se encontraba abierto el día anterior?')
ggplot(melsyd_economy %>%
mutate(Abierto_Dia_Anterior = as.factor(lag(Open))) %>%
filter(Open==1, DayOfWeek!=1) , aes(x= Abierto_Dia_Anterior, y=Sales, color = Abierto_Dia_Anterior)) +
geom_boxplot()
# Boxplots Efecto de vacaciones de escuela
ggplot(melsyd_economy %>%
filter(Open==1) %>%
mutate(SchoolHoliday = as.factor(SchoolHoliday)), aes(x= SchoolHoliday, y=Sales, color = SchoolHoliday)) +
geom_boxplot()+
labs(title = "Ventas diarias Tienda 1",
y = "Ventas", x='Ese día es vacaciones escolares?')
# La mediana parece ser más baja, no obstante hay mucho solapamiento
melsyd_economy %>%
autoplot(Customers) +
labs(title = "Clientes diarios Tienda 1",
y = "Clientes", x='Fecha')
## Warning: Removed 1 row(s) containing missing values (geom_path).
melsyd_economy %>%
ggplot(aes(x = Customers, y = Sales)) +
geom_point()
## Warning: Removed 161 rows containing missing values (geom_point).
cor(df_train_tienda_1$Customers, df_train_tienda_1$Sales)
## [1] 0.9843412
cor(df_train_tienda_1$Customers, df_train_tienda_1$Sales, method = 'spearman')
## [1] 0.9554348
melsyd_economy %>%
gg_lag(Sales, geom = "point") +
labs(x = "lag(Sales, k)")
## Warning: Removed 161 rows containing missing values (gg_lag).
melsyd_economy %>%
gg_lag(Sales, geom = "point", lags = c(1,2,3,7,14,30,365) ) +
labs(x = "lag(Sales, k)")
## Warning: Removed 161 rows containing missing values (gg_lag).
melsyd_economy %>%
ACF(Sales, lag_max = 40, na.action = na.pass) %>%
autoplot() + labs(title="Sales")
melsyd_economy %>%
ACF(Sales, lag_max = 400, na.action = na.pass) %>%
autoplot() + labs(title="Sales")
# Para lags pequeños tenemos una alta correlación, cada 7 días se ven picos, lo que indica que hay cierta estacionalidad # También vemos un pico grande aproximadamente al año # Cuando hay tendencia, suele haber una autocorrelación alta y positiva en los lags pequeños, valores cercanos en tiempo, cercanos también en valor # Cuando la data es estacional, las autocorrelaciones son altas para los lags estacionales (en múltiplos de dichos lags)
melsyd_economy_2 <- df_train_filtered %>%
filter(Store == 1) %>%
# mutate(Sales = ifelse(Open == 1, Sales, NA),
# Customers = ifelse(Open == 1, Customers, NA),
# Promo = ifelse(Open == 1, Promo, NA),
# StateHoliday = ifelse(Open == 1, StateHoliday, NA),
# SchoolHoliday = ifelse(Open == 1, SchoolHoliday, NA),
# ) %>%
as_tsibble(index = Date)
dcmp <- melsyd_economy_2 %>%
model(stl = STL(Sales))
components(dcmp)
melsyd_economy_2 %>% autoplot(Sales)
components(dcmp) %>% autoplot()
components(dcmp) %>%
as_tsibble() %>%
autoplot(Sales, colour="gray") +
geom_line(aes(y=trend), colour = "#D55E00") +
labs(
title = "Sales"
)
components(dcmp) %>%
as_tsibble() %>%
autoplot(Sales, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(title = "Ventas diarias Tienda 1 <estacionalidad ajustada>",
y = "Ventas", x='Fecha')
# Considerando la serie completa
melsyd_economy_3 <- melsyd_economy_2 %>%
mutate(
`7-MA` = slider::slide_dbl(Sales, mean,
.before = 3, .after = 3, .complete = TRUE)
)
melsyd_economy_3 %>%
autoplot(Sales) +
geom_line(aes(y = `7-MA`), colour = "#D55E00") +
labs(y = "Ventas",
title = "Ventas diarias Tienda 1") +
guides(colour = guide_legend(title = "series"))
## Warning: Removed 6 row(s) containing missing values (geom_path).
# Filtrando cuando se encuentra cerrado
melsyd_economy_3 <- melsyd_economy %>%
mutate(
`7-MA` = slider::slide_dbl(Sales, mean,
.before = 3, .after = 3, .complete = TRUE)
)
melsyd_economy_3 %>%
autoplot(Sales) +
geom_line(aes(y = `7-MA`), colour = "#D55E00") +
labs(y = "Ventas",
title = "Ventas diarias Tienda 1") +
guides(colour = guide_legend(title = "series"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 942 row(s) containing missing values (geom_path).
melsyd_economy_4 <- melsyd_economy_2 %>%
mutate(
`12-MA` = slider::slide_dbl(Sales, mean,
.before = 3, .after = 3, .complete = TRUE),
`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 26, .after = 26, .complete = TRUE)
)
melsyd_economy_4 %>%
autoplot(Sales, colour = "gray") +
geom_line(aes(y = `2x12-MA`), colour = "#D55E00") +
labs(y = "Ventas",
title = "Ventas diarias Tienda 1 <tendencia como suma de dos filtros MA>")
## Warning: Removed 58 row(s) containing missing values (geom_path).
melsyd_economy_2 %>%
model(
classical_decomposition(Sales, type = "additive")
# classical_decomposition(Sales~season(365), type = "additive")
) %>%
components() %>%
autoplot() +
labs(title = "Descomposición lineal aditiva para las ventas Tienda 1")
## Warning: Removed 3 row(s) containing missing values (geom_path).
# install.packages("seasonal")
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
# No funca
# x11_dcmp <- melsyd_economy_2[,c('Date','Sales')] %>%
# model(x11 = X_13ARIMA_SEATS(Sales ~ x11(na.action = seasonal::na.x13))) %>%
# components()
# autoplot(x11_dcmp) +
# labs(title =
# "Decomposition of total US retail employment using X-11.")
#
# sum(is.na(melsyd_economy_2$Sales))
melsyd_economy_2 %>%
model(
STL(Sales ~ trend(window = 360) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()+
labs(title = "Descomposición STL para las ventas Tienda 1")
# STL Features
melsyd_economy_2 %>%
features(Sales, feat_stl)
df_train_tienda_1_train <- melsyd_economy_2 %>%
filter(Date<'2015-01-01')
# install.packages('fable')
library(fable)
df_train_tienda_1_train %>% model(MEAN(Sales))
df_train_tienda_1_train %>% model(NAIVE(Sales))
df_train_tienda_1_train %>% model(SNAIVE(Sales ~ lag("year")))
## Warning: Non-integer lag orders for random walk models are not supported.
## Rounding to the nearest integer.
df_train_tienda_1_train %>% model(SNAIVE(Sales ~ lag("week")))
df_train_tienda_1_train %>% model(RW(Sales ~ drift()))
# Set training data from 1992 to 2006
# train <- aus_production %>%
# filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
beer_fit <- df_train_tienda_1_train %>%
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales)
)
# Generate forecasts for 14 quarters
beer_fc <- beer_fit %>% forecast(h = 42)
# Plot forecasts against actual values
beer_fc %>%
autoplot(df_train_tienda_1_train, level = NULL) +
autolayer( df_train_tienda_1_train %>%
filter(Date>="2015-01-01") %>%
select(Sales),
colour = "black"
) +
labs(
title = "Forecasts para las ventas Tienda 1"
) +
guides(colour = guide_legend(title = "Forecast"))
## Plot variable not specified, automatically selected `.vars = Sales`
# Set training data from 1992 to 2006
# train <- aus_production %>%
# filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
beer_fit <- melsyd_economy %>%
model(
# Actual_Sales = Sales,
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve (week)` = SNAIVE(Sales ~ lag("week")),
`Seasonal naïve (year)` = SNAIVE(Sales ~ lag("year")),
)
## Warning: Non-integer lag orders for random walk models are not supported.
## Rounding to the nearest integer.
# Generate forecasts for 14 quarters
beer_fc <- beer_fit %>% forecast(h = 42)
## Warning: Non-integer lag orders for random walk models are not supported.
## Rounding to the nearest integer.
# Plot forecasts against actual values
beer_fc %>%
autoplot(melsyd_economy
# %>% filter(Date>="2014-10-20")
, level = NULL) +
autolayer( melsyd_economy %>%
filter(Date>="2015-01-01") %>%
select(Sales),
colour = "black"
) +
labs(
title = "Forecasts para las ventas Tienda 1"
) +
guides(colour = guide_legend(title = "Forecast"))
## Plot variable not specified, automatically selected `.vars = Sales`
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 row(s) containing missing values (geom_path).
# Set training data from 1992 to 2006
# train <- aus_production %>%
# filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
beer_fit <- melsyd_economy %>%
filter(Date<"2015-01-01") %>%
model(
# Actual_Sales = Sales,
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve (week)` = SNAIVE(Sales ~ lag("week")),
# `Seasonal naïve (year)` = SNAIVE(Sales ~ lag("year")),
)
# Generate forecasts for 14 quarters
beer_fc <- beer_fit %>% forecast(h = 42)
# Plot forecasts against actual values
beer_fc %>%
autoplot(melsyd_economy
%>% filter(Date>="2014-10-20")
, level = NULL) +
autolayer( melsyd_economy %>%
filter(Date>="2015-01-01") %>%
select(Sales),
colour = "black"
) +
labs(
title = "Forecasts para las ventas Tienda 1"
) +
guides(colour = guide_legend(title = "Forecast"))
## Plot variable not specified, automatically selected `.vars = Sales`
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 1 row(s) containing missing values (geom_path).
df_train_tienda_1 <- df_train_tienda_1 %>%
as_tsibble(index = Date)
# Set training data from 1992 to 2006
# train <- aus_production %>%
# filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
beer_fit <- df_train_tienda_1 %>%
filter(Date<"2015-01-01") %>%
model(
# Actual_Sales = Sales,
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve (week)` = SNAIVE(Sales ~ lag("week")),
# `Seasonal naïve (year)` = SNAIVE(Sales ~ lag("year")),
)
# Generate forecasts for 14 quarters
beer_fc <- beer_fit %>% forecast(h = 42)
# Plot forecasts against actual values
beer_fc %>%
autoplot(df_train_tienda_1
%>% filter(Date>="2014-10-20")
, level = NULL) +
autolayer( df_train_tienda_1 %>%
filter(Date>="2015-01-01") %>%
select(Sales),
colour = "black"
) +
labs(
title = "Forecasts para las ventas Tienda 1"
) +
guides(colour = guide_legend(title = "Forecast"))
## Plot variable not specified, automatically selected `.vars = Sales`
augment(beer_fit)
aug <- df_train_tienda_1 %>%
model(SNAIVE(Sales ~ lag("week"))) %>%
augment()
autoplot(aug, .innov) +
labs(title = "Residuos del método naïve")
## Warning: Removed 7 row(s) containing missing values (geom_path).
aug %>%
ggplot(aes(x = .innov)) +
geom_histogram() +
labs(title = "Histograma de los residuos")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 7 rows containing non-finite values (stat_bin).
aug %>%
ACF(.innov) %>%
autoplot() +
labs(title = "Residuos del método naïve")
# Se pueden ver algunas autocorrelaciones altas.
df_train_tienda_1 %>%
model(SNAIVE(Sales ~ lag("week"))) %>%
gg_tsresiduals()
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing non-finite values (stat_bin).
df_train_tienda_1 %>%
model(SNAIVE(Sales ~ lag("week"))) %>%
forecast(h = 42) %>%
autoplot(df_train_tienda_1) +
labs(title="Forecast de Ventas <intervalos de predicción SNAIVE>", y="$US" )
melsyd_economy %>%
model(SNAIVE(Sales ~ lag("week"))) %>%
forecast(h = 42) %>%
autoplot(melsyd_economy) +
labs(title="Forecast de ventas <Intervalos de predicción SNAIVE>", y="Ventas" )
## Warning: Removed 1 row(s) containing missing values (geom_path).
melsyd_economy %>%
model(NAIVE(Sales)) %>%
forecast(h = 42) %>%
autoplot(melsyd_economy) +
labs(title="Forecast de ventas <Intervalos de predicción SNAIVE>", y="Ventas" )
## Warning: Removed 1 row(s) containing missing values (geom_path).
recent_production <- melsyd_economy %>%
slice(n()-42:0)
beer_train <- melsyd_economy %>%
slice(1:(n()-43))
beer_fit <- beer_train %>%
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales),
Drift = RW(Sales ~ drift())
)
beer_fc <- beer_fit %>%
forecast(h = 42)
beer_fc %>%
autoplot(
melsyd_economy %>% slice(n()-134:0),
level = NULL
) +
labs(title="Forecast de ventas", y="Ventas" ) +
guides(colour = guide_legend(title = "Forecast"))
accuracy(beer_fc, recent_production)
recent_production <- melsyd_economy_2 %>%
slice(n()-42:0)
beer_train <- melsyd_economy_2 %>%
slice(1:(n()-43))
beer_fit <- beer_train %>%
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales),
Drift = RW(Sales ~ drift())
)
beer_fc <- beer_fit %>%
forecast(h = 42)
beer_fc %>%
autoplot(
melsyd_economy_2 %>% slice(n()-134:0),
level = NULL
) +
labs(title="Forecast de ventas", y="Ventas" ) +
guides(colour = guide_legend(title = "Forecast"))
accuracy(beer_fc, recent_production)
# Time series cross-validation accuracy
google_2015_tr <- melsyd_economy_2 %>%
stretch_tsibble(.init = 731, .step = 42) %>% # Toma al menos los primeros dos años y va tomando de a 6 semanas para testear
relocate(Date, .id)
google_2015_tr
rbind(
# TSCV accuracy
google_2015_tr %>%
model(RW(Sales ~ drift())) %>%
forecast(h = 1) %>%
accuracy(melsyd_economy_2)
,
# Training set accuracy
melsyd_economy_2 %>%
model(RW(Sales ~ drift())) %>%
accuracy()
)
# TSCV accuracy
rbind(
google_2015_tr %>%
model(SNAIVE(Sales)) %>%
forecast(h = 42) %>%
accuracy(melsyd_economy_2)
,
# Training set accuracy
melsyd_economy_2 %>%
model(SNAIVE(Sales)) %>%
accuracy()
)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 41 observations are missing between 2015-08-01 and 2015-09-10
fit_beer <- recent_production %>%
model(TSLM(Sales ~ trend() + season()))
report(fit_beer)
## Series: Sales
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -1046.43 -515.83 11.41 450.16 1420.09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4029.557 346.809 11.619 1.45e-13 ***
## trend() 13.787 8.445 1.633 0.112
## season()week2 -73.866 379.883 -0.194 0.847
## season()week3 -96.233 395.873 -0.243 0.809
## season()week4 -4312.186 395.061 -10.915 8.16e-13 ***
## season()week5 499.527 394.429 1.266 0.214
## season()week6 119.907 393.976 0.304 0.763
## season()week7 -123.713 393.705 -0.314 0.755
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 681.8 on 35 degrees of freedom
## Multiple R-squared: 0.8627, Adjusted R-squared: 0.8353
## F-statistic: 31.43 on 7 and 35 DF, p-value: 2.783e-13
melsyd_economy_rl = melsyd_economy_2 %>%
filter(Date<"2015-06-19")
melsyd_economy_rl$DayOfWeek = as.factor(melsyd_economy_rl$DayOfWeek)
fit_beer <- melsyd_economy_rl %>%
model(TSLM(Sales ~ Open * (DayOfWeek + Date) - Open - DayOfWeek - Date))
# model(TSLM(Sales ~ Date))
report(fit_beer)
## Series: Sales
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -2366.6 -611.4 0.0 403.3 4298.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.155e-12 7.237e+01 0.000 1.00000
## Open:DayOfWeek1 1.118e+04 2.066e+03 5.412 8.03e-08 ***
## Open:DayOfWeek2 1.068e+04 2.066e+03 5.170 2.89e-07 ***
## Open:DayOfWeek3 1.056e+04 2.066e+03 5.109 3.96e-07 ***
## Open:DayOfWeek4 1.044e+04 2.065e+03 5.057 5.16e-07 ***
## Open:DayOfWeek5 1.073e+04 2.064e+03 5.199 2.49e-07 ***
## Open:DayOfWeek6 1.096e+04 2.065e+03 5.306 1.41e-07 ***
## Open:DayOfWeek7 NA NA NA NA
## Open:Date -3.703e-01 1.277e-01 -2.900 0.00382 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 900.9 on 891 degrees of freedom
## Multiple R-squared: 0.8047, Adjusted R-squared: 0.8031
## F-statistic: 524.3 on 7 and 891 DF, p-value: < 2.22e-16
recent_production <- melsyd_economy_2 %>%
slice(n()-42:0)
recent_production$DayOfWeek = as.factor(recent_production$DayOfWeek)
fc_beer <- forecast(fit_beer, new_data = recent_production)
## Warning: prediction from a rank-deficient fit may be misleading
fc_beer %>%
autoplot(recent_production) +
labs(
title = "Forecast de ventas usando regresión"
)
accuracy(fc_beer, recent_production)
# # install.packages('forecast')
# library(Rcpp)
# library(forecast)
# # Estimate parameters
fit <- melsyd_economy_2 %>%
filter(Date<"2015-06-19") %>%
model(fable::ETS(Sales ~ error("A") + trend("N") + season("N")))
fc <- fit %>%
forecast(h = 42)
fc %>%
autoplot(melsyd_economy_2 %>%
slice(n()-126:0)) +
labs(title="Exponential Smoothing") +
guides(colour = "none")
accuracy(fc, recent_production )
aus_holidays <- melsyd_economy_2 %>%
filter(Date<"2015-06-19") %>%
summarise(Sales = sum(Sales)/1e3)
fit <- aus_holidays %>%
model(
additive = ETS(Sales ~ error("A") + trend("A") +
season("A")),
multiplicative = ETS(Sales ~ error("M") + trend("A") +
season("M"))
)
fc <- fit %>% forecast(h = 42)
fc %>%
autoplot(aus_holidays %>%
slice(n()-126:0), level = NULL) +
labs(title="Sales <Estacionalidad Holts Winters> - Additive and Multiplicative") +
guides(colour = guide_legend(title = "Forecast"))
accuracy(fc, recent_production %>%
mutate(Sales = Sales) )
sth_cross_ped <- melsyd_economy_2 %>%
summarise(Sales = sum(Sales)/1e3)
sth_cross_ped %>%
filter(Date<"2015-06-19") %>%
model(
hw = ETS(Sales ~ error("M") + trend("Ad") + season("M"))
) %>%
forecast(h = 42) %>%
autoplot(sth_cross_ped %>% slice(n()-126:0)) +
labs(title = "Estacionalidad Holts - Winters Multiplicativo")
dcmp <- melsyd_economy_2 %>%
summarise(Sales = sum(Sales)/1e3) %>%
model(
hw = ETS(Sales ~ error("M") + trend("Ad") + season("M"))
)
components(dcmp)
melsyd_economy_2 %>% autoplot(Sales)
components(dcmp) %>% autoplot()
## Warning: Removed 7 row(s) containing missing values (geom_path).
dcmp <- melsyd_economy_2 %>%
summarise(Sales = sum(Sales)/1e3) %>%
model(
hw = ETS(Sales ~ error("A") + trend("Ad") + season("A"))
)
components(dcmp)
melsyd_economy_2 %>% autoplot(Sales)
components(dcmp) %>% autoplot()
## Warning: Removed 7 row(s) containing missing values (geom_path).
melsyd_economy_2 %>%
mutate(diff_sales = difference(Sales)) %>%
features(diff_sales, ljung_box, lag = 10)
melsyd_economy_2 %>%
summarise(Sales = sum(Sales)/1e6) %>%
transmute(
`Sales ($million)` = Sales,
`Log sales` = log(Sales),
`Annual change in log sales` = difference(log(Sales), 365),
`Doubly differenced log sales` =
difference(difference(log(Sales), 365), 1)
) %>%
pivot_longer(-Date, names_to="Type", values_to="Sales") %>%
mutate(
Type = factor(Type, levels = c(
"Sales ($million)",
"Log sales",
"Annual change in log sales",
"Doubly differenced log sales"))
) %>%
ggplot(aes(x = Date, y = Sales)) +
geom_line() +
facet_grid(vars(Type), scales = "free_y") +
labs(title = "Corticosteroid drug sales", y = NULL)
melsyd_economy_2 %>%
features(Sales, unitroot_kpss)
melsyd_economy_2 %>%
mutate(diff_close = difference(Sales, 365)) %>%
features(diff_close, unitroot_kpss)
melsyd_economy_2 %>%
mutate(diff_close = difference(Sales, 7)) %>%
features(diff_close, unitroot_kpss)
melsyd_economy_2 %>%
features(Sales, unitroot_ndiffs)
melsyd_economy_2 %>%
gg_tsdisplay(difference(Sales, 365),
plot_type='partial', lag=36) +
labs(title="Seasonally differenced", y="")
## Warning: Removed 365 row(s) containing missing values (geom_path).
## Warning: Removed 365 rows containing missing values (geom_point).
melsyd_economy_2 %>%
gg_tsdisplay(difference(Sales, 7),
plot_type='partial', lag=36) +
labs(title="Seasonally differenced", y="")
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 7 rows containing missing values (geom_point).
melsyd_economy_2 %>%
gg_tsdisplay(difference(Sales, 28) %>% difference(),
plot_type='partial', lag=36) +
labs(title = "Double differenced", y="")
## Warning: Removed 29 row(s) containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_point).
melsyd_economy %>%
gg_tsdisplay(difference(Sales, 365),
plot_type='partial', lag=36) +
labs(title="Seasonally differenced", y="")
## Warning: Removed 366 row(s) containing missing values (geom_path).
## Warning: Removed 552 rows containing missing values (geom_point).
melsyd_economy %>%
gg_tsdisplay(difference(Sales, 7),
plot_type='partial', lag=36) +
labs(title="Seasonally differenced", y="")
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 191 rows containing missing values (geom_point).
melsyd_economy_2 %>%
gg_tsdisplay(difference(Sales, 28) %>% difference(),
plot_type='partial', lag=36) +
labs(title = "Double differenced", y="")
## Warning: Removed 29 row(s) containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_point).
# install.packages('fable.prophet')
library(fable.prophet)
## Loading required package: Rcpp
train <- melsyd_economy_2 %>%
filter(Date<"2015-06-19")
fit <- train %>%
model(
arima = ARIMA(Sales),
ets = ETS(Sales),
prophet = prophet(Sales ~ season(type = "multiplicative"))
)
fc <- fit %>% forecast(h = 42)
fc %>% autoplot(melsyd_economy_2 %>%
slice(n()-134:0)
)
melsyd_economy_2 %>%
filter(Date<"2015-06-19") %>%
model(NNETAR(Sales)) %>%
forecast(h = 42, times = 100) %>% # El parámetro times permite correr distintas simulaciones, cambian los intervalos de confianza
autoplot(melsyd_economy_2 %>%
slice(n()-134:0)) +
labs(x = "Year", y = "Counts",
title = "Yearly sunspots")
# Time series cross-validation accuracy
df_cv <- melsyd_economy_2 %>%
slice(0:(n()-85)) %>%
stretch_tsibble(.init = 731, .step = 42) %>% # Toma al menos los primeros dos años y va tomando de a 6 semanas para testear
relocate(Date, .id)
print(paste("Cantidad de folds: ", max(df_cv$.id)))
## [1] "Cantidad de folds: 4"
df_cv
rbind(
# TSCV accuracy
df_cv %>%
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales),
Drift = RW(Sales ~ drift()),
ETS = ETS(Sales),
# Prophet = prophet(Sales ~ season(type = "multiplicative")),
Prophet = prophet(Sales),
# Redes_Neuronales = model(NNETAR(Sales))
)
%>%
forecast(h =42) %>%
accuracy(melsyd_economy_2) # Filtro sólo los días que estuvo abierto
# ,
# # Training set accuracy
# melsyd_economy_2 %>%
# model(RW(Sales ~ drift())) %>%
# accuracy()
)
# Time series cross-validation accuracy
df_cv <- melsyd_economy_2 %>%
mutate(
Sales = slider::slide_dbl(Sales, mean,
.before = 6, .after = 0, .complete = FALSE)) %>%
slice(0:(n()-85)) %>%
stretch_tsibble(.init = 731, .step = 42) %>% # Toma al menos los primeros dos años y va tomando de a 6 semanas para testear
relocate(Date, .id)
print(paste("Cantidad de folds: ", max(df_cv$.id)))
## [1] "Cantidad de folds: 4"
df_cv
rbind(
# TSCV accuracy
df_cv %>%
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales),
Drift = RW(Sales ~ drift()),
ETS = ETS(Sales),
# Prophet = prophet(Sales ~ season(type = "multiplicative")),
Prophet = prophet(Sales),
# Redes_Neuronales = model(NNETAR(Sales))
)
%>%
forecast(h =42) %>%
accuracy(melsyd_economy_2 ) # Filtro sólo los días que estuvo abierto
# ,
# # Training set accuracy
# melsyd_economy_2 %>%
# model(RW(Sales ~ drift())) %>%
# accuracy()
)
# Time series cross-validation accuracy
df_cv <- melsyd_economy_2 %>%
mutate(
Sales = slider::slide_dbl(Sales, mean,
.before = 27, .after = 0, .complete = FALSE)) %>%
slice(0:(n()-85)) %>%
stretch_tsibble(.init = 731, .step = 11) %>% # Toma al menos los primeros dos años y va tomando de a 6 semanas para testear
relocate(Date, .id)
print(paste("Cantidad de folds: ", max(df_cv$.id)))
## [1] "Cantidad de folds: 12"
df_cv
rbind(
# TSCV accuracy
df_cv %>%
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales),
Drift = RW(Sales ~ drift()),
ETS = ETS(Sales),
# Prophet = prophet(Sales ~ season(type = "multiplicative")),
Prophet = prophet(Sales),
# Redes_Neuronales = model(NNETAR(Sales))
)
%>%
forecast(h = 42) %>%
accuracy(melsyd_economy_2 ) # Filtro sólo los días que estuvo abierto
# ,
# # Training set accuracy
# melsyd_economy_2 %>%
# model(RW(Sales ~ drift())) %>%
# accuracy()
)
melsyd_economy_2 %>%
slice(0:(n()-42)) %>%
model(
Prophet = prophet(Sales)
)%>%
forecast(h = 42) %>%
accuracy(melsyd_economy_2)
melsyd_economy_2 %>%
slice(0:(n()-42)) %>%
# filter(Open==1) %>%
model(
Prophet = prophet(Sales)
)%>%
forecast(h = 42) %>%
autoplot(melsyd_economy_2 %>%
# filter(Open==1) %>%
slice(n()-134:0)) +
labs(title = "Ventas últimas 18 semanas y predicciones con Prophet")
fit <- melsyd_economy_2 %>%
slice(0:(n()-42)) %>%
model(
prophet(Sales) )
fit %>%
components() %>%
autoplot()
fit %>% gg_tsresiduals()